import datetime
import pandas as pd


def run(et, area):
    df_pre = pd.read_csv(f'D:\Store\Python-Download\{area}月新用户\{area}月用户_2017-12.csv')
    st = '2018-01'
    # 新用户
    while st != et:
        df = pd.read_csv(f'D:\Store\Python-Download\{area}月新用户\{area}月用户_{st}.csv')
        df_pre = df_pre.append(df)
        st = (datetime.datetime.strptime(st, '%Y-%m') + datetime.timedelta(days=32)).strftime("%Y-%m")
    df_et = pd.read_csv(f'D:\Store\Python-Download\{area}月新用户\{area}月用户_{et}.csv')
    new_users = list(set(df_et['f_user_id']) - set(df_pre['f_user_id']))
    df_new = pd.DataFrame({'f_user_id': new_users})
    df_new.to_csv(f'D:\Store\Python-Download\{area}月新用户\{area}月新用户_{et}.csv')

    # 留存率
    data = []
    st = '2018-01'
    while st != et:
        df_st_new = pd.read_csv(f'D:\Store\Python-Download\{area}月新用户\{area}月新用户_{st}.csv')
        new_num = df_st_new.shape[0]
        repur_users = list(set(df_st_new['f_user_id']) & set(df_et['f_user_id']))
        repur_num = len(repur_users)
        print(st, len(set(df_st_new['f_user_id'])), len(set(df_et['f_user_id'])))
        repur_rate = repur_num / new_num
        data.append([st, repur_rate])
        st = (datetime.datetime.strptime(st, '%Y-%m') + datetime.timedelta(days=32)).strftime("%Y-%m")

    df_result = pd.DataFrame(data, columns=['month', 'repur_rate'])
    df_result.to_csv(f'D:\Store\Python-Download\{area}新增用户留存率_{et}.csv')


if __name__ == '__main__':
    for area in ['全国', '杭州']:
        for end_month in ['2022-08', '2022-09', '2022-10']:
            print(area, end_month)
            run(end_month, area)
